{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Input Preparation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import os\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from skimage.color import rgb2gray\n",
    "from PIL import Image\n",
    "import tensorflow as tf\n",
    "from skimage.filters import threshold_otsu\n",
    "import keras_ocr\n",
    "import gc\n",
    "from numba import njit\n",
    "from keras.models import load_model\n",
    "import keras\n",
    "from tensorflow.keras.layers.experimental.preprocessing import StringLookup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def denoise(img):\n",
    "    image_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)\n",
    "    dst = cv2.fastNlMeansDenoising(image_gray, h=25, templateWindowSize=15, searchWindowSize=35)\n",
    "    return dst\n",
    "\n",
    "def resize(img):\n",
    "    width, height = img.size\n",
    "    if width > 2000 or height > 2000:\n",
    "        if width > height:\n",
    "            ratio = 2000 / width\n",
    "        else:\n",
    "            ratio = 2000 / height\n",
    "        img = img.resize((int(width * ratio), int(height * ratio)), Image.LANCZOS)\n",
    "    return img\n",
    "\n",
    "def preproces_image(image, *, kernel_size=15, crop_side=50, blocksize=35, constant=15, max_value=255):\n",
    "    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
    "    bit = cv2.bitwise_not(gray)\n",
    "    image_adapted = cv2.adaptiveThreshold(\n",
    "        src=bit,\n",
    "        maxValue=max_value,\n",
    "        adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,\n",
    "        thresholdType=cv2.THRESH_BINARY,\n",
    "        blockSize=blocksize,\n",
    "        C=constant,\n",
    "    )\n",
    "    kernel = np.ones((kernel_size, kernel_size), np.uint8)\n",
    "    erosion = cv2.erode(image_adapted, kernel, iterations=2)\n",
    "    return erosion[crop_side:-crop_side, crop_side:-crop_side]\n",
    "\n",
    "def find_edges(image_preprocessed, *, bw_threshold=150, limits=(0.2, 0.15)):\n",
    "    mask = image_preprocessed < bw_threshold\n",
    "    edges = []\n",
    "    for axis in (1, 0):\n",
    "        count = mask.sum(axis=axis)\n",
    "        limit = limits[axis] * image_preprocessed.shape[axis]\n",
    "        index_ = np.where(count >= limit)\n",
    "        _min, _max = index_[0][0], index_[0][-1]\n",
    "        edges.append((_min, _max))\n",
    "    return edges\n",
    "\n",
    "def adapt_edges(edges, *, height, width):\n",
    "    MAX_EDGE_EXTENSION = 100\n",
    "    EDGE_EXTENSION_FACTOR = 10\n",
    "    EDGE_EXTENSION_DIVISOR = 11\n",
    "\n",
    "    (x_min, x_max), (y_min, y_max) = edges\n",
    "    x_min2 = x_min\n",
    "    x_max2 = x_max + min(MAX_EDGE_EXTENSION, (height - x_max) * EDGE_EXTENSION_FACTOR // EDGE_EXTENSION_DIVISOR)\n",
    "    y_min2 = max(0, y_min)\n",
    "    y_max2 = y_max + min(MAX_EDGE_EXTENSION, (width - y_max) * EDGE_EXTENSION_FACTOR // EDGE_EXTENSION_DIVISOR)\n",
    "    return (x_min2, x_max2), (y_min2, y_max2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Preprocess Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "image = cv2.imread('trial.tif')\n",
    "image = Image.fromarray(image)\n",
    "# Resize the image\n",
    "image = resize(image)\n",
    "image = np.array(image)\n",
    "\n",
    "# Preprocess the image\n",
    "\n",
    "height, width = image.shape[0:2]\n",
    "image_preprocessed = preproces_image(image)\n",
    "edges = find_edges(image_preprocessed)\n",
    "(x_min, x_max), (y_min, y_max) = adapt_edges(edges, height=height, width=width)\n",
    "image_cropped = image[x_min:x_max, y_min:y_max]\n",
    "# img_denoised = denoise(img_cropped)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Detect Words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking for C:\\Users\\Ayman\\.keras-ocr\\craft_mlt_25k.h5\n",
      "1/1 [==============================] - 2s 2s/step\n"
     ]
    }
   ],
   "source": [
    "words_array = []\n",
    "try:\n",
    "        detector = keras_ocr.detection.Detector(weights='clovaai_general')\n",
    "        boxes = detector.detect(images=[image])[0]\n",
    "        for i, box in enumerate(boxes):\n",
    "                cropped_img = keras_ocr.tools.warpBox(image=image, box=box)\n",
    "                words_array.append(cropped_img)\n",
    "except:\n",
    "        print('error')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Edge Enhancement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "#image contrast enhancement\n",
    "def grayscalize(img):\n",
    "    #check if its 3 channel or grayscale, based on that convert to grayscale\n",
    "    if img.ndim > 2: # is this is a rgb/rgba image\n",
    "        img = rgb2gray(img)\n",
    "    return img\n",
    "\n",
    "def binarize_image(image):\n",
    "    threshold = threshold_otsu(image)\n",
    "    return image < threshold\n",
    "\n",
    "\n",
    "def denoise_binary_image(binary_image, kernel_size=5):\n",
    "    # Define a kernel for morphological operations\n",
    "    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))\n",
    "\n",
    "    # Perform erosion followed by dilation to remove noise\n",
    "    denoised_image = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel)\n",
    "\n",
    "    return denoised_image\n",
    "\n",
    "def thresholding(image, threshold, typee='Binary', param1=0, param2=0):\n",
    "    # A function to apply intensity thresholding to a grey-scale image\n",
    "    # The thresholding could be simple binary thresholding or adaptive gaussian thresholding\n",
    "    # If the type is not set to 'Binary' then the parameters for adaptive thresholdinf must\n",
    "    # be used which are:\n",
    "    #param1: local region size ( preferably an odd number)\n",
    "    #param2: constant to be added to local mean\n",
    "    if(typee.lower()=='binary'):\n",
    "        ret, thresh= cv2.threshold(image,threshold,255,cv2.THRESH_BINARY_INV)\n",
    "    else:\n",
    "        thresh = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,param1,param2)\n",
    "    return thresh\n",
    "\n",
    "def edg_enhance(image):\n",
    "    denoised = denoise_binary_image(image)\n",
    "    gray = grayscalize(image)\n",
    "    binarized_image = binarize_image(gray)\n",
    "    # thresh = thresholding(denoised, 127, typee='Binary')\n",
    "    #convert image data type to uint8\n",
    "    binarized_image_uint8 = (binarized_image * 255).astype(np.uint8)\n",
    "    \n",
    "    return binarized_image_uint8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "words_array = [edg_enhance(image) for image in words_array]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Island Segmentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def directionalHistogram(img, direction='H'):\n",
    "    (w,h) = img.shape\n",
    "    pixel_count = 0\n",
    "    if(direction=='H'):\n",
    "        return [img[j].tolist().count(255) for j in range(w-1)]\n",
    "    else:\n",
    "        return [img[:,j].tolist().count(255) for j in range(h-1)]\n",
    "\n",
    "def cropLineToWords(viable_sequences, image):\n",
    "    (w,h) = image.shape\n",
    "    words = [image[0:w-1, viable_sequences[i-1]:viable_sequences[i]] for i in range(1,len(viable_sequences))]\n",
    "    words.append(image[0:w-1, viable_sequences[-1]:h-1])\n",
    "    return words\n",
    "\n",
    "def removeSpaces(words):\n",
    "    return [word for word in words if np.sum(word[:,:]>0)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "segmented_words = []\n",
    "for image in words_array:\n",
    "    words = []\n",
    "    (w,h) = image.shape\n",
    "    hist_vertical = directionalHistogram(image, direction='V')\n",
    "    zero_sites = np.where(np.asarray(hist_vertical)==0)[0]\n",
    "    sequences = [[zero_sites[i-1], zero_sites[i]] for i in range(1, len(zero_sites)) if zero_sites[i] != zero_sites[i-1] + 1]\n",
    "    if not sequences:\n",
    "        continue\n",
    "    sequence_lengths = [seq[1] - seq[0] + 1 for seq in sequences]\n",
    "    average_sequence_length = sum(sequence_lengths[1:-1]) / len(sequences)\n",
    "    overlap_factor = 0.75 * average_sequence_length\n",
    "    viable_sequences_unrolled = [seq[0] for seq in sequences if seq[1] - seq[0] + 1 >= average_sequence_length - overlap_factor] + [-1]\n",
    "    if viable_sequences_unrolled[0] != 0:\n",
    "        viable_sequences_unrolled = [0] + viable_sequences_unrolled\n",
    "    words.append(cropLineToWords(viable_sequences_unrolled, image))\n",
    "    ordered_words = [word if np.sum(word[:,:]) else 'space' for word in words[0]]\n",
    "    for ordered_word in ordered_words:\n",
    "        if not isinstance(ordered_word, str):\n",
    "            segmented_words.append(ordered_word)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Cleaning Unnecessary Segments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def clean_segment(image):\n",
    "    avg = np.average(image)\n",
    "    \n",
    "    if image.shape[1] < 10 and avg < 0.5:\n",
    "        return 'space'\n",
    "    elif avg > 0.15:\n",
    "        return image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "segmented_words = [clean_segment(image) for image in segmented_words]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# remove spaces\n",
    "segmented_words = [word for word in segmented_words if not isinstance(word, str)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Padding The Segments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def directionalHistogram(img, direction='H'):\n",
    "    (w,h) = img.shape\n",
    "    if(direction=='H'):\n",
    "        return [img[j].tolist().count(255) for j in range(w-1)]\n",
    "    else:\n",
    "        return [img[:,j].tolist().count(255) for j in range(h-1)]\n",
    "\n",
    "def crop_image(image, direction='H'):\n",
    "    w,h=image.shape\n",
    "    if(w<10 or h<10):\n",
    "        return image\n",
    "    hist=directionalHistogram(image, direction )\n",
    "    flipped_hist=np.flip(hist)\n",
    "    try:\n",
    "        startpos = next(i for i in range(1, len(hist)-1) if hist[i-1]==0 and hist[i]==0 and hist[i+1]!=0)\n",
    "        endpos = len(flipped_hist)-1 - next(i for i in range(1, len(flipped_hist)-1) if flipped_hist[i-1]==0 and flipped_hist[i]==0 and flipped_hist[i+1]!=0)\n",
    "    except StopIteration:\n",
    "        return image\n",
    "    cropped_image = image[startpos:endpos,:] if direction == 'H' else image[:,startpos:endpos]\n",
    "    if cropped_image.shape[0] <= 0 or cropped_image.shape[1] <= 0:\n",
    "        return image\n",
    "    return cropped_image\n",
    "\n",
    "image_width = 32\n",
    "image_height = 32\n",
    "def distortion_free_resize(image, img_size=(image_height, image_width)):\n",
    "    h,w= img_size\n",
    "    if h <= 0 or w <= 0:\n",
    "        raise ValueError(\"Image size must be positive\")\n",
    "    if image.shape[0] <= 0 or image.shape[1] <= 0:\n",
    "        return image\n",
    "    # convert to 3-channel image\n",
    "    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)\n",
    "    image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)\n",
    "    pad_height = h - tf.shape(image)[0]\n",
    "    pad_width = w - tf.shape(image)[1]\n",
    "    pad_height_top = pad_height // 2 if pad_height % 2 == 0 else pad_height // 2 + 1\n",
    "    pad_width_left = pad_width // 2 if pad_width % 2 == 0 else pad_width // 2 + 1\n",
    "    image = tf.pad(image, paddings=[[pad_height_top, pad_height//2], [pad_width_left, pad_width//2], [0, 0]])\n",
    "    image = tf.transpose(image, perm=[1, 0, 2])\n",
    "    image = tf.image.flip_left_right(image)\n",
    "    return image\n",
    "\n",
    "def perform_padding(image):\n",
    "    image_v_cropped=crop_image(image, 'V')\n",
    "    image=distortion_free_resize(image_v_cropped)\n",
    "    image=np.rot90(image)\n",
    "    return image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "segmented_words = np.array([perform_padding(image) for image in segmented_words])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# scale images\n",
    "segmented_words = segmented_words / 255.0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Characters Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "AUTOTUNE = tf.data.AUTOTUNE\n",
    "\n",
    "characters = {'ء','آ','أ','ؤ','إ','ئ','ا','ب','ة','ت','ث','ج','ح','خ','د','ذ','ر','ز','س','ش','ص','ض','ط','ظ','ع','غ','ف','ق','ك','ل','م','ن','ه','و','ى','ي'}\n",
    "max_len = 7\n",
    "# Mapping characters to integers.\n",
    "char_to_num = StringLookup(vocabulary=sorted(list(characters)), mask_token=None)\n",
    "\n",
    "# Mapping integers back to original characters.\n",
    "num_to_char = StringLookup(\n",
    "    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True\n",
    ")\n",
    "\n",
    "def decode_batch_predictions(pred):\n",
    "    input_len = np.ones(pred.shape[0]) * pred.shape[1]\n",
    "    # Use greedy search. For complex tasks, you can use beam search.\n",
    "    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][\n",
    "        :, :max_len\n",
    "    ]\n",
    "    # Iterate over the results and get back the text.\n",
    "    output_text = []\n",
    "    for res in results:\n",
    "        res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))\n",
    "        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode(\"utf-8\")\n",
    "        output_text.append(res)\n",
    "    return output_text"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
